import os
import pandas as pd

from scraper.hk_stock_dividend import fetch_dividend_table, process_dividend_data


def hk_stock_dividend_batch():
    base_dir = "../data/hk_stock_dividend"
    input_file = os.path.join(base_dir, "SCHI_20250707.xlsx")
    output_dir = base_dir

    # 读取股份代号
    df = pd.read_excel(input_file)
    df.columns = df.columns.str.strip()
    # print(df.columns)
    df["code"] = df["股份代号"].astype(str)

    processed_data_list = []

    for code in df["code"]:
        print(f"正在抓取 {code} 的分红数据...")
        # 抓取分红数据
        dividend_data = fetch_dividend_table(code)
        # 保存原始数据
        raw_file = os.path.join(output_dir, f"{code}_dividend_raw.csv")
        pd.DataFrame(dividend_data).to_csv(raw_file, index=False, encoding="utf-8-sig")
        # 处理分红数据
        processed_df = process_dividend_data(dividend_data)
        # 增加 stock_code 列
        processed_df["stock_code"] = code
        # 保存处理后的数据（可选）
        processed_file = os.path.join(output_dir, f"{code}_dividend_processed.csv")
        processed_df.to_csv(processed_file, index=False, encoding="utf-8-sig")
        processed_data_list.append(processed_df)

    # 合并所有股票的分红数据
    all_dividend_df = pd.concat(processed_data_list, ignore_index=True)
    all_dividend_df.to_csv(os.path.join(output_dir, "all_hk_stock_dividend.csv"), index=False, encoding="utf-8-sig")
    print("全部分红数据已合并保存。")
    return all_dividend_df


if __name__ == "__main__":
    df = hk_stock_dividend_batch()
